This kernel serves the purpose of presenting quick data analytic with Google Visulization. The analysis will mainly focus on two aspects: Geographical information and time stamp of the calls.
library(googleVis)
library(sqldf)
library(ggmap)
library(ggplot2)
Sys.setenv(LANG = "en")
Sys.setlocale("LC_TIME", "C")
## [1] "C"
data<-read.csv('911.csv')
df<-data[,c('lat','lng','title','timeStamp')]
summary(df)
## lat lng title
## Min. :30.33 Min. :-95.60 Traffic: VEHICLE ACCIDENT -:27981
## 1st Qu.:40.10 1st Qu.:-75.39 Traffic: DISABLED VEHICLE -: 9240
## Median :40.14 Median :-75.30 Fire: FIRE ALARM : 6790
## Mean :40.16 Mean :-75.32 EMS: RESPIRATORY EMERGENCY : 6077
## 3rd Qu.:40.23 3rd Qu.:-75.21 EMS: CARDIAC EMERGENCY : 6025
## Max. :41.17 Max. :-75.00 EMS: FALL VICTIM : 5904
## (Other) :58501
## timeStamp
## 2015-12-10 17:40:01: 8
## 2015-12-23 14:12:01: 7
## 2016-04-21 17:57:01: 7
## 2015-12-23 16:07:01: 6
## 2015-12-29 10:12:01: 6
## 2016-02-02 07:22:02: 6
## (Other) :120478
Before analysis, we first perform data preprocessing step for a more neat dataset. First, we split the title of the event.
Next, we will deal with timeStamp of the 911 calls.
df<-data.frame(df,Date=NA,Time=NA,Hour=NA,Days=NA,Period=NA)
df$Date<-sapply(df$timeStamp,function(x) strsplit(as.character(x),' ')[[1]][1])
df$Time <- sapply(df$timeStamp,function(x) strsplit(as.character(x),' ')[[1]][2] )
df$Hour<-sapply(df$Time,function(x) as.numeric(strsplit(as.character(x),':')[[1]][1]))
df$Period<-sapply(df$Hour,function(x){
nu<-x
if(nu>6&&nu<=12){
'Morining'
}else if(nu>12&&nu<=18){
'Afternoon'
}else if(nu>18&&nu<=24){
'Evening'
}else{
'Midnight'
}
}
)
c<-as.Date(df$Date)
df$Date<-c
cc<-weekdays(c)
df$Days<-cc
Then, the data are all in the form we want. I will divide the analytics in terms of the type of events so the whole procedure will be transparent and efficient.
In order to make the analysis and plot more direct and efficient, I choose to wrap up the analyzes into differnt functions so that the subsquent procedures will be boosted.
A: Where do these calls happen?
map_d<-qmap('Philadelphia', zoom = 9, maptype = 'hybrid')
Map<-function(item){
color<-list('EMS'='Yellow','Fire'='red','Traffic'='blue')
if(item=='All'){
tmp<-df
}else{
tmp<-df[df$Event_type==item,]
}
if(item=='All'){
plt<-map_d + geom_point(data = tmp, aes(x = lng, y = lat,color=Event_type), alpha=0.5)
}
else{
plt<-map_d + geom_point(data = tmp, aes(x = lng, y = lat), color=color[item], alpha=0.5)
}
return(plot(plt))
}
B: Calls among days of a week
days<-function(item){
if(item=='All'){
tmp<-df
}else{
tmp<-df[df$Event_type==item,]
}
tmp<-sqldf('SELECT Days,count(Days) as freq from tmp group by Days')
dayp<-gvisColumnChart(tmp,options = list(width=800,height=600))
return(dayp)
}
C: Calls among periods
period<-function(item){
if(item=='All'){
tmp<-df
}else{
tmp<-df[df$Event_type==item,]
}
tmp<-sqldf('SELECT Period,count(Period) as freq from tmp group by Period')
periodp<-gvisPieChart(tmp,options = list(width=600,height=400))
return(periodp)
}
D: Calls among date
DT<-function(item){
if(item=='All'){
tmp<-df
}else{
tmp<-df[df$Event_type==item,]
}
tmp<-sqldf('SELECT Date,count(Date) as freq from tmp group by Date')
CalD <- gvisCalendar(tmp,datevar="Date",
numvar="freq",
options=list(
title="Calls Records Around the Year",
width=800,
height=440,
calendar="{yearLabel: { fontName: 'Times-Roman',
fontSize: 32, color: '#1A8763', bold: true},
cellSize: 20,
cellColor: { stroke: 'red', strokeOpacity: 0.2 },
focusedCellColor: {stroke:'red'}}")
)
return(CalD)
}
E: Calls among Hours
HR<-function(item){
if(item=='All'){
tmp<-df
}else{
tmp<-df[df$Event_type==item,]
}
tmp<-sqldf('SELECT Hour,count(Hour) as freq from tmp group by Hour')
HRP<-gvisColumnChart(tmp,options = list(width=800,height=400))
return(HRP)
}
F:The most frequent calling reasons
RE<-function(item){
tmp<-df[df$Event_type==item,]
tmp<-sqldf('SELECT Event_detail,count(Event_detail) as freq from tmp group by Event_detail order by freq desc')
Table <- gvisTable(tmp,options=list(page='enable',width=300,height=600))
return(Table)
}
A: Where do they happen
Map('All')
B:Calls among the week
days('All')
C:Calls among Day
period('All')
D: Date Records of the call
DT('All')
E:Calls in hours
HR('All')
A: Where do they happen
Map('EMS')
B:Calls among the week
days('EMS')
C:Calls among Day
period('EMS')
D: Date Records of the call
DT('EMS')
E:Calls in hours
HR('EMS')
F: Details of Calls
RE('EMS')
A: Where do they happen
Map('Traffic')
B:Calls among the week
days('Traffic')
C:Calls among Day
period('Traffic')
D: Date Records of the call
DT('Traffic')
E:Calls in hours
HR('Traffic')
F: Details of Calls
RE('Traffic')
A: Where do they happen
Map('Fire')
B:Calls among the week
days('Fire')
C:Calls among Day
period('Fire')
D: Date Records of the call
DT('Fire')
E:Calls in hours
HR('Fire')
F: Details of Calls
RE('Fire')